library(ggplot2)
library(vcfR)
library(adegenet)
library(adegraphics)
library(pegas)
library(StAMPP)
library(lattice)
library(gplots)
library(ape)
library(ggmap)
Read in VCF file with
full.ery.vcf <- read.vcfR("~/Dropbox/full.ery/populations.snps.vcf") #read in all data
## Scanning file to determine attributes.
## File attributes:
## meta lines: 14
## header_line: 15
## variant count: 118607
## column count: 65
##
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 118607
## Character matrix gt cols: 65
## skip: 0
## nrows: 118607
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant: 118607
## All variants processed
head(full.ery.vcf) #check the vcf object
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.2"
## [1] "##fileDate=20190602"
## [1] "##source=\"Stacks v2.3b\""
## [1] "##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total Depth for Each Allele\">"
## [1] "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">"
## [1] "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"
## [1] "First 6 rows."
## [1]
## [1] "***** Fixed section *****"
## CHROM POS ID REF ALT QUAL FILTER
## [1,] "chr1" "727446" "20:72:-" "G" "A" NA "PASS"
## [2,] "chr1" "727477" "20:41:-" "T" "A" NA "PASS"
## [3,] "chr1" "727563" "19:144:+" "G" "T" NA "PASS"
## [4,] "chr1" "727631" "22:21:+" "T" "C" NA "PASS"
## [5,] "chr1" "727658" "22:48:+" "T" "A" NA "PASS"
## [6,] "chr1" "727665" "22:55:+" "A" "G" NA "PASS"
## [1]
## [1] "***** Genotype section *****"
## FORMAT E_papuana_12155 E_papuana_12856 E_papuana_14618
## [1,] "GT:DP:AD:GQ:GL" NA NA NA
## [2,] "GT:DP:AD:GQ:GL" NA NA NA
## [3,] "GT:DP:AD:GQ:GL" NA NA NA
## [4,] "GT:DP:AD:GQ:GL" NA NA NA
## [5,] "GT:DP:AD:GQ:GL" NA NA NA
## [6,] "GT:DP:AD:GQ:GL" NA NA NA
## E_papuana_14621 E_papuana_14622
## [1,] NA NA
## [2,] NA NA
## [3,] NA NA
## [4,] NA "1/1:3:0,3:20:-8.92,-1.51,-0.01"
## [5,] NA "0/0:3:3,0:31:-0.00,-2.54,-10.83"
## [6,] NA "0/0:3:3,0:30:-0.00,-2.44,-10.67"
## [1] "First 6 columns only."
## [1]
## [1] "Unique GT formats:"
## [1] "GT:DP:AD:GQ:GL"
## [1]
full.ery.vcf@fix[1:10,1:5] #check
## CHROM POS ID REF ALT
## [1,] "chr1" "727446" "20:72:-" "G" "A"
## [2,] "chr1" "727477" "20:41:-" "T" "A"
## [3,] "chr1" "727563" "19:144:+" "G" "T"
## [4,] "chr1" "727631" "22:21:+" "T" "C"
## [5,] "chr1" "727658" "22:48:+" "T" "A"
## [6,] "chr1" "727665" "22:55:+" "A" "G"
## [7,] "chr1" "727730" "22:120:+" "G" "A"
## [8,] "chr1" "727733" "22:123:+" "G" "A"
## [9,] "chr1" "727742" "22:132:+" "G" "A"
## [10,] "chr1" "727821" "27:8:+" "G" "T"
#quick check read depth distribution per individual
dp <- extract.gt(full.ery.vcf, element='DP', as.numeric=TRUE)
#pdf("DP_RAD_data.pdf", width = 10, height=3) # boxplot
par(mar=c(8,4,1,1))
boxplot(dp, las=3, col=c("#C0C0C0", "#808080"), ylab="Read Depth (DP)",
las=2, cex=0.4, cex.axis=0.5)
#dev.off()
#zoom to smaller values
#pdf("DP_RAD_data_zoom.pdf", width = 10, height=3) # boxplot
par(mar=c(8,4,1,1))
boxplot(dp, las=3, col=c("#C0C0C0", "#808080"), ylab="Read Depth (DP)",
las=2, cex=0.4, cex.axis=0.5, ylim=c(0,50))
abline(h=8, col="red")
#dev.off()
### convert to genlight
full.ery.genlight <- vcfR2genlight(full.ery.vcf, n.cores=1)
#locNames(aa.genlight) <- paste(vcf@fix[,1],vcf@fix[,2],sep="_") # add real SNP.names
#SET NAMES HERE
# add popnames: here "population" (group) names are chars 5,6,7 of ind name
pop(full.ery.genlight)<-substr(indNames(full.ery.genlight),3,5)
# check the genlight
full.ery.genlight # check the basic info on the genlight object
## /// GENLIGHT OBJECT /////////
##
## // 56 genotypes, 118,607 binary SNPs, size: 18.3 Mb
## 1964815 (29.58 %) missing data
##
## // Basic content
## @gen: list of 56 SNPbin
##
## // Optional content
## @ind.names: 56 individual labels
## @loc.names: 118607 locus labels
## @chromosome: factor storing chromosomes of the SNPs
## @position: integer storing positions of the SNPs
## @pop: population of each individual (group size range: 4-39)
## @other: a list containing: elements without names
indNames(full.ery.genlight) # check individual names
## [1] "E_papuana_12155" "E_papuana_12856" "E_papuana_14618"
## [4] "E_papuana_14621" "E_papuana_14622" "E_papuana_14624"
## [7] "E_papuana_16064" "E_papuana_16434" "E_trichroa_4702"
## [10] "E_trichroa_4766" "E_trichroa_4772" "E_trichroa_4775"
## [13] "E_trichroa_12043" "E_trichroa_12054" "E_trichroa_12061"
## [16] "E_trichroa_12062" "E_trichroa_12254" "E_trichroa_12301"
## [19] "E_trichroa_12846" "E_trichroa_16071" "E_trichroa_16075"
## [22] "E_trichroa_16086" "E_trichroa_16164" "E_trichroa_16234"
## [25] "E_trichroa_16248" "E_trichroa_16315" "E_trichroa_16548"
## [28] "E_trichroa_18292" "E_trichroa_18295" "E_trichroa_18320"
## [31] "E_trichroa_18327" "E_trichroa_18389" "E_trichroa_18401"
## [34] "E_trichroa_27881" "E_trichroa_27882" "E_trichroa_23619"
## [37] "E_trichroa_B32623" "E_trichroa_B52739" "E_trichroa_34978"
## [40] "E_trichroa_35027" "E_trichroa_32805" "E_trichroa_32819"
## [43] "E_trichroa_DOT-209" "E_trichroa_32834" "E_trichroa_32797"
## [46] "E_trichroa_32838" "E_trichroa_32757" "E_coloria_28439"
## [49] "E_coloria_28562" "E_coloria_28408" "E_coloria_28582"
## [52] "E_pealii_26533" "E_pealii_22532" "E_pealii_24275"
## [55] "E_pealii_22533" "E_pealii_22531"
# N missing SNPs per sample
x <- summary(t(as.matrix(full.ery.genlight)))
individs<-colnames(x)
num.missing.snps<-x[7,]
#create DF with missing snp info
reffed.missing.snps<-data.frame(individs, num.missing.snps)
rownames(reffed.missing.snps) <- NULL
reffed.missing.snps$num.missing.snps <- gsub("NA's :", "", reffed.missing.snps$num.missing.snps)
reffed.missing.snps$num.missing.snps<- as.numeric(reffed.missing.snps$num.missing.snps)
reffed.missing.snps$miss.percentage<-(reffed.missing.snps$num.missing.snps/full.ery.genlight$n.loc)
reffed.missing.snps
## individs num.missing.snps miss.percentage
## 1 E_papuana_12155 49853 0.4203209
## 2 E_papuana_12856 23730 0.2000725
## 3 E_papuana_14618 40805 0.3440353
## 4 E_papuana_14621 24279 0.2047012
## 5 E_papuana_14622 19323 0.1629162
## 6 E_papuana_14624 27074 0.2282665
## 7 E_papuana_16064 18578 0.1566349
## 8 E_papuana_16434 20405 0.1720387
## 9 E_trichroa_4702 114340 0.9640240
## 10 E_trichroa_4766 34807 0.2934650
## 11 E_trichroa_4772 57289 0.4830153
## 12 E_trichroa_4775 26776 0.2257540
## 13 E_trichroa_12043 36349 0.3064659
## 14 E_trichroa_12054 27842 0.2347416
## 15 E_trichroa_12061 27237 0.2296407
## 16 E_trichroa_12062 26611 0.2243628
## 17 E_trichroa_12254 27101 0.2284941
## 18 E_trichroa_12301 46557 0.3925316
## 19 E_trichroa_12846 27991 0.2359979
## 20 E_trichroa_16071 27746 0.2339322
## 21 E_trichroa_16075 61174 0.5157706
## 22 E_trichroa_16086 27241 0.2296745
## 23 E_trichroa_16164 28166 0.2374733
## 24 E_trichroa_16234 33514 0.2825634
## 25 E_trichroa_16248 32827 0.2767712
## 26 E_trichroa_16315 27214 0.2294468
## 27 E_trichroa_16548 28491 0.2402135
## 28 E_trichroa_18292 30633 0.2582731
## 29 E_trichroa_18295 35247 0.2971747
## 30 E_trichroa_18320 39314 0.3314644
## 31 E_trichroa_18327 37198 0.3136240
## 32 E_trichroa_18389 33087 0.2789633
## 33 E_trichroa_18401 33059 0.2787272
## 34 E_trichroa_27881 31942 0.2693096
## 35 E_trichroa_27882 85162 0.7180183
## 36 E_trichroa_23619 29086 0.2452300
## 37 E_trichroa_B32623 73557 0.6201742
## 38 E_trichroa_B52739 24183 0.2038918
## 39 E_trichroa_34978 25581 0.2156787
## 40 E_trichroa_35027 30743 0.2592006
## 41 E_trichroa_32805 30602 0.2580118
## 42 E_trichroa_32819 29681 0.2502466
## 43 E_trichroa_DOT-209 46619 0.3930544
## 44 E_trichroa_32834 25308 0.2133770
## 45 E_trichroa_32797 27547 0.2322544
## 46 E_trichroa_32838 24927 0.2101647
## 47 E_trichroa_32757 30639 0.2583237
## 48 E_coloria_28439 29054 0.2449602
## 49 E_coloria_28562 37883 0.3193994
## 50 E_coloria_28408 42626 0.3593886
## 51 E_coloria_28582 29101 0.2453565
## 52 E_pealii_26533 34219 0.2885074
## 53 E_pealii_22532 40336 0.3400811
## 54 E_pealii_24275 26597 0.2242448
## 55 E_pealii_22533 29463 0.2484086
## 56 E_pealii_22531 28101 0.2369253
Three samples are missing over 50% of loci, all of which are trichroa. We will drop them.
# create new genlight using this selection
#drop the three samples that are over 50% missing SNPs
filtered.full.ery <- new("genlight",
(as.matrix(full.ery.genlight)[c(1:8,10:34,36,38:56), ]))
indNames(filtered.full.ery) # check individual names
## [1] "E_papuana_12155" "E_papuana_12856" "E_papuana_14618"
## [4] "E_papuana_14621" "E_papuana_14622" "E_papuana_14624"
## [7] "E_papuana_16064" "E_papuana_16434" "E_trichroa_4766"
## [10] "E_trichroa_4772" "E_trichroa_4775" "E_trichroa_12043"
## [13] "E_trichroa_12054" "E_trichroa_12061" "E_trichroa_12062"
## [16] "E_trichroa_12254" "E_trichroa_12301" "E_trichroa_12846"
## [19] "E_trichroa_16071" "E_trichroa_16075" "E_trichroa_16086"
## [22] "E_trichroa_16164" "E_trichroa_16234" "E_trichroa_16248"
## [25] "E_trichroa_16315" "E_trichroa_16548" "E_trichroa_18292"
## [28] "E_trichroa_18295" "E_trichroa_18320" "E_trichroa_18327"
## [31] "E_trichroa_18389" "E_trichroa_18401" "E_trichroa_27881"
## [34] "E_trichroa_23619" "E_trichroa_B52739" "E_trichroa_34978"
## [37] "E_trichroa_35027" "E_trichroa_32805" "E_trichroa_32819"
## [40] "E_trichroa_DOT-209" "E_trichroa_32834" "E_trichroa_32797"
## [43] "E_trichroa_32838" "E_trichroa_32757" "E_coloria_28439"
## [46] "E_coloria_28562" "E_coloria_28408" "E_coloria_28582"
## [49] "E_pealii_26533" "E_pealii_22532" "E_pealii_24275"
## [52] "E_pealii_22533" "E_pealii_22531"
Here we make a 50% complete data matrix with 93,861 SNPs And a 90% complete data matrix with 53,426 SNPs
#50
fiftypercent.filtered.full.ery <- new("genlight", (as.matrix(filtered.full.ery))
[,(colSums(is.na (as.matrix(filtered.full.ery))) < 27)])
fiftypercent.filtered.full.ery$n.loc
## [1] 93861
fiftypercent.filtered.full.ery$ind.names
## [1] "E_papuana_12155" "E_papuana_12856" "E_papuana_14618"
## [4] "E_papuana_14621" "E_papuana_14622" "E_papuana_14624"
## [7] "E_papuana_16064" "E_papuana_16434" "E_trichroa_4766"
## [10] "E_trichroa_4772" "E_trichroa_4775" "E_trichroa_12043"
## [13] "E_trichroa_12054" "E_trichroa_12061" "E_trichroa_12062"
## [16] "E_trichroa_12254" "E_trichroa_12301" "E_trichroa_12846"
## [19] "E_trichroa_16071" "E_trichroa_16075" "E_trichroa_16086"
## [22] "E_trichroa_16164" "E_trichroa_16234" "E_trichroa_16248"
## [25] "E_trichroa_16315" "E_trichroa_16548" "E_trichroa_18292"
## [28] "E_trichroa_18295" "E_trichroa_18320" "E_trichroa_18327"
## [31] "E_trichroa_18389" "E_trichroa_18401" "E_trichroa_27881"
## [34] "E_trichroa_23619" "E_trichroa_B52739" "E_trichroa_34978"
## [37] "E_trichroa_35027" "E_trichroa_32805" "E_trichroa_32819"
## [40] "E_trichroa_DOT-209" "E_trichroa_32834" "E_trichroa_32797"
## [43] "E_trichroa_32838" "E_trichroa_32757" "E_coloria_28439"
## [46] "E_coloria_28562" "E_coloria_28408" "E_coloria_28582"
## [49] "E_pealii_26533" "E_pealii_22532" "E_pealii_24275"
## [52] "E_pealii_22533" "E_pealii_22531"
#90
ninetypercent.filtered.full.ery <- new("genlight", (as.matrix(filtered.full.ery))
[,(colSums(is.na (as.matrix(filtered.full.ery))) < 6)])
ninetypercent.filtered.full.ery$n.loc
## [1] 53426
ninetypercent.filtered.full.ery$ind.names
## [1] "E_papuana_12155" "E_papuana_12856" "E_papuana_14618"
## [4] "E_papuana_14621" "E_papuana_14622" "E_papuana_14624"
## [7] "E_papuana_16064" "E_papuana_16434" "E_trichroa_4766"
## [10] "E_trichroa_4772" "E_trichroa_4775" "E_trichroa_12043"
## [13] "E_trichroa_12054" "E_trichroa_12061" "E_trichroa_12062"
## [16] "E_trichroa_12254" "E_trichroa_12301" "E_trichroa_12846"
## [19] "E_trichroa_16071" "E_trichroa_16075" "E_trichroa_16086"
## [22] "E_trichroa_16164" "E_trichroa_16234" "E_trichroa_16248"
## [25] "E_trichroa_16315" "E_trichroa_16548" "E_trichroa_18292"
## [28] "E_trichroa_18295" "E_trichroa_18320" "E_trichroa_18327"
## [31] "E_trichroa_18389" "E_trichroa_18401" "E_trichroa_27881"
## [34] "E_trichroa_23619" "E_trichroa_B52739" "E_trichroa_34978"
## [37] "E_trichroa_35027" "E_trichroa_32805" "E_trichroa_32819"
## [40] "E_trichroa_DOT-209" "E_trichroa_32834" "E_trichroa_32797"
## [43] "E_trichroa_32838" "E_trichroa_32757" "E_coloria_28439"
## [46] "E_coloria_28562" "E_coloria_28408" "E_coloria_28582"
## [49] "E_pealii_26533" "E_pealii_22532" "E_pealii_24275"
## [52] "E_pealii_22533" "E_pealii_22531"
pop(ninetypercent.filtered.full.ery)<-substr(indNames(ninetypercent.filtered.full.ery),3,5)
pop(fiftypercent.filtered.full.ery)<-substr(indNames(fiftypercent.filtered.full.ery),3,5)
Make a PCA of all retained individuals, with both data matrixes. Only 90% complete matrix shown here for computational efficiency, but both run locally and PCAs are identical.
#pca with all snps
pca.1 <- glPca(ninetypercent.filtered.full.ery, nf=5) # retain first 300 axes (for later use in find.clusters); slow function
#pca.2 <- glPca(fiftypercent.filtered.full.ery, nf=5) # retain first 300 axes (for later use in find.clusters); slow function
#quick plot
plot(pca.1$scores[,1], pca.1$scores[,2])
#plot(pca.2$scores[,1], pca.2$scores[,2])
#pull pca scores out of df
pca.scores<-as.data.frame(pca.1$scores)
#pca.scores2<-as.data.frame(pca.2$scores)
#ggplot color by species
ggplot(pca.scores, aes(x=PC1, y=PC2, color=pop(ninetypercent.filtered.full.ery))) +
geom_point(cex = 2)
#ggplot(pca.scores2, aes(x=PC1, y=PC2, color=pop(ninetypercent.filtered.full.ery))) +
geom_point(cex = 2)
## geom_point: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
#ggplot color by species
ggplot(pca.scores, aes(x=PC3, y=PC4, color=pop(ninetypercent.filtered.full.ery))) +
geom_point(cex = 2)
Calculate Fst and Nei’s Distance between all of the species in our dataset (papuana, trichroa, coloria, pealii)
### Calculate Nei's distances between individuals/pops
# Nei's 1972 distance between indivs
ery.D.ind.90 <- stamppNeisD(ninetypercent.filtered.full.ery, pop = FALSE)
ery.D.ind.50 <- stamppNeisD(fiftypercent.filtered.full.ery, pop = FALSE)
# exportmatrix - for SplitsTree
#stamppPhylip(todi.D.ind, file="~/Desktop/Todiramphus/reffed.medfilt.todi.indiv_Neis_distance.phy.dst")
# Nei's 1972 distance between pops
ery.D.pop.90 <- stamppNeisD(ninetypercent.filtered.full.ery, pop = TRUE)
ery.D.pop.50 <- stamppNeisD(fiftypercent.filtered.full.ery, pop = TRUE)
# export
#stamppPhylip(todi.D.pop, file="~/Desktop/Todiramphus/reffed.medfilt.todi.pops_Neis_distance.phy.dst")
### Calculate pairwise Fst among populations
ninetypercent.filtered.full.ery@ploidy <- as.integer(ploidy(ninetypercent.filtered.full.ery))
fiftypercent.filtered.full.ery@ploidy <- as.integer(ploidy(fiftypercent.filtered.full.ery))
#
ery.fst.90<-stamppFst(ninetypercent.filtered.full.ery, nboots = 1, percent =95, nclusters=5)
ery.fst.50<-stamppFst(fiftypercent.filtered.full.ery, nboots = 1, percent =95, nclusters=5)
#modify the matrix for opening in SplitsTree
ery.fst.90
## pap tri col pea
## pap NA NA NA NA
## tri 0.00589417 NA NA NA
## col 0.45864683 0.3787445 NA NA
## pea 0.57304560 0.5200281 0.7947378 NA
ery.fst.50
## pap tri col pea
## pap NA NA NA NA
## tri 0.006807597 NA NA NA
## col 0.439857644 0.3564122 NA NA
## pea 0.551214028 0.4875244 0.7931585 NA
### heatmap of the indivs distance matrix
colnames(ery.D.ind.90) <- rownames(ery.D.ind.90)
colnames(ery.D.ind.50) <- rownames(ery.D.ind.50)
#pdf(file="~/Desktop/Todiramphus/medfilt.reffed.Neis_dist_heatmap.pdf", width=10, height=10)
#50% complete matrix
heatmap.2(ery.D.ind.50, trace="none", cexRow=0.4, cexCol=0.4)
#90% complete matrix
heatmap.2(ery.D.ind.90, trace="none", cexRow=0.4, cexCol=0.4)
#23619 is the Palau individual which comes out as sister to all of tri/pap
#32805 & DOT-209 are from Guadalcanal and Kolombangara, and are the only differentiated tri/pap
#dev.off()
# plot unrooted NJ tree
#50% complete matrix
plot(nj(ery.D.ind.50), type = "unrooted", cex = .5)
#90% complete matrix
plot(nj(ery.D.ind.90), type = "unrooted", cex = .5)
We will now make a subset dataset with only papuana and trichroa, and identify the trichroa by geography
#drop down to just papuana/trichroa to look closely for divergence
ninety.pap.tri.full.ery <- new("genlight",
(as.matrix(ninetypercent.filtered.full.ery)[c(1:44), ]))
pop(ninety.pap.tri.full.ery)<-c("pap","pap","pap","pap","pap","pap","pap","pap","png.tri","png.tri","png.tri","png.tri"
,"png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri"
,"png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri","png.tri"
,"png.tri","png.tri","png.tri","png.tri","png.tri","palau.tri","aus.tri","sols.makira.tri"
,"sols.makira.tri","sols.guadalcanal.tri","sols.guadalcanal.tri","sols.kolombangara.tri","sols.tri"
,"sols.guadalcanal.tri","sols.guadalcanal.tri","sols.malaita.tri")
Run PCA of only trichroa/papuana
#pca with all snps
pap.tri.pca.90 <- glPca(ninety.pap.tri.full.ery, nf=6) # retain first 300 axes (for later use in find.clusters); slow function
#pull pca scores out of df
pap.tri.pca.scores.90<-as.data.frame(pap.tri.pca.90$scores)
#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC1, y=PC2, color=pop(ninety.pap.tri.full.ery))) +
geom_point(cex = 2)
#guadalcanal and kolombangara trichroa separate out on PC2
#Papuana, and trichroa from PNG, makira, malaita, and australia all cluster on PC 1&2
#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC3, y=PC4, color=pop(ninety.pap.tri.full.ery))) +
geom_point(cex = 2)
#ggplot color by species
ggplot(pap.tri.pca.scores.90, aes(x=PC5, y=PC6, color=pop(ninety.pap.tri.full.ery))) +
geom_point(cex = 2)
Run full population structure analysis and make NJ tree for just papuana/trichroa
### Calculate Nei's distances between individuals/pops
# Nei's 1972 distance between indivs
pap.tri.ery.D.ind.90 <- stamppNeisD(ninety.pap.tri.full.ery, pop = FALSE)
# exportmatrix - for SplitsTree
#stamppPhylip(todi.D.ind, file="~/Desktop/Todiramphus/reffed.medfilt.todi.indiv_Neis_distance.phy.dst")
# Nei's 1972 distance between pops
pap.tri.ery.D.pop.90 <- stamppNeisD(ninety.pap.tri.full.ery, pop = TRUE)
# export
#stamppPhylip(todi.D.pop, file="~/Desktop/Todiramphus/reffed.medfilt.todi.pops_Neis_distance.phy.dst")
### Calculate pairwise Fst among populations
ninety.pap.tri.full.ery@ploidy <- as.integer(ploidy(ninety.pap.tri.full.ery))
#
pap.tri.ery.fst.90<-stamppFst(ninety.pap.tri.full.ery, nboots = 1, percent =95, nclusters=5)
#modify the matrix for opening in SplitsTree
pap.tri.ery.fst.90
## pap png.tri palau.tri aus.tri
## pap NA NA NA NA
## png.tri 0.007426551 NA NA NA
## palau.tri 0.235497051 0.208606072 NA NA
## aus.tri 0.044974882 0.021747500 NaN NA
## sols.makira.tri 0.010918219 -0.001794072 0.2383701 0.04339528
## sols.guadalcanal.tri 0.018612105 0.008956574 0.2179659 0.03111431
## sols.kolombangara.tri 0.069754533 0.050431976 NaN NaN
## sols.tri 0.003267231 -0.016928971 NaN NaN
## sols.malaita.tri 0.011999379 -0.011223495 NaN NaN
## sols.makira.tri sols.guadalcanal.tri
## pap NA NA
## png.tri NA NA
## palau.tri NA NA
## aus.tri NA NA
## sols.makira.tri NA NA
## sols.guadalcanal.tri 0.0093390200 NA
## sols.kolombangara.tri 0.0609972159 0.017726608
## sols.tri -0.0002754384 -0.011301170
## sols.malaita.tri 0.0013947928 0.004669284
## sols.kolombangara.tri sols.tri sols.malaita.tri
## pap NA NA NA
## png.tri NA NA NA
## palau.tri NA NA NA
## aus.tri NA NA NA
## sols.makira.tri NA NA NA
## sols.guadalcanal.tri NA NA NA
## sols.kolombangara.tri NA NA NA
## sols.tri NaN NA NA
## sols.malaita.tri NaN NaN NA
#palau trichroa ~.2 Fst with all other groups
#PNG trichroa / papuana = .007 Fst
### heatmap of the indivs distance matrix
colnames(pap.tri.ery.D.ind.90) <- rownames(pap.tri.ery.D.ind.90)
#pdf(file="~/Desktop/Todiramphus/medfilt.reffed.Neis_dist_heatmap.pdf", width=10, height=10)
heatmap.2(pap.tri.ery.D.ind.90, trace="none", cexRow=0.4, cexCol=0.4)
#23619 is the Palau individual which comes out as sister to all of tri/pap
#32805 & DOT-209 are from Guadalcanal and Kolombangara, and are the only differentiated tri/pap
#dev.off()
# plot unrooted NJ tree
plot(nj(pap.tri.ery.D.ind.90), type = "unrooted", cex = .5)
The only individual that is different enough to be separated in the NJ tree is the Palau bird
read in stacks populations Fst calculation between papuana and trichroa from PNG
full.ery.pap.ref.fst<-read.delim(file = "~/Dropbox/full.ery/populations.fst_papuana-trichroa.tsv")
mean(full.ery.pap.ref.fst$AMOVA.Fst)
## [1] 0.02466679
#avg. Fst = .025
clean chromosomes up
#combine unmapped chromosomes
levels(full.ery.pap.ref.fst$Chr)
## [1] "chr1" "chr1_ABQF01066038_random"
## [3] "chr1_ABQF01112931_random" "chr1_ABQF01116611_random"
## [5] "chr1_EQ832370_random" "chr1_EQ832384_random"
## [7] "chr1_EQ832422_random" "chr10"
## [9] "chr10_ABQF01085055_random" "chr10_EQ832874_random"
## [11] "chr10_EQ832893_random" "chr11_EQ832924_random"
## [13] "chr12_EQ832950_random" "chr12_KB442361_random"
## [15] "chr13" "chr13_EQ832957_random"
## [17] "chr13_EQ832959_random" "chr13_EQ832971_random"
## [19] "chr13_EQ832995_random" "chr14_EQ833009_random"
## [21] "chr15_EQ833041_random" "chr17_EQ833050_random"
## [23] "chr18_KB442395_random" "chr1A"
## [25] "chr1A_ABQF01112188_random" "chr1A_EQ832472_random"
## [27] "chr2" "chr2_ABQF01062723_random"
## [29] "chr2_ABQF01082098_random" "chr2_ABQF01114633_random"
## [31] "chr2_EQ832482_random" "chr2_EQ832516_random"
## [33] "chr2_EQ832538_random" "chr2_EQ832546_random"
## [35] "chr2_EQ832581_random" "chr2_KB442204_random"
## [37] "chr2_KB442221_random" "chr2_KB442223_random"
## [39] "chr2_KB442240_random" "chr20"
## [41] "chr21" "chr21_EQ833135_random"
## [43] "chr21_EQ833162_random" "chr22_EQ833189_random"
## [45] "chr24" "chr26"
## [47] "chr26_EQ833310_random" "chr27"
## [49] "chr27_EQ833329_random" "chr3_EQ832612_random"
## [51] "chr3_EQ832614_random" "chr3_EQ832615_random"
## [53] "chr4" "chr4_ABQF01084713_random"
## [55] "chr4_EQ832640_random" "chr4_EQ832641_random"
## [57] "chr4_EQ832678_random" "chr4_KB442255_random"
## [59] "chr4A" "chr4A_EQ832701_random"
## [61] "chr5" "chr5_EQ832721_random"
## [63] "chr5_EQ832723_random" "chr5_KB442299_random"
## [65] "chr6" "chr6_ABQF01081686_random"
## [67] "chr6_EQ832758_random" "chr6_EQ832760_random"
## [69] "chr7" "chr7_EQ832789_random"
## [71] "chr7_EQ832795_random" "chr7_EQ832809_random"
## [73] "chr8_EQ832817_random" "chr8_EQ832818_random"
## [75] "chr8_EQ832819_random" "chr8_EQ832820_random"
## [77] "chr9" "chr9_EQ832848_random"
## [79] "chr9_EQ832871_random" "chrM"
## [81] "chrUn_ABQF01069968" "chrUn_ABQF01072490"
## [83] "chrUn_ABQF01072775" "chrUn_ABQF01074087"
## [85] "chrUn_ABQF01074862" "chrUn_ABQF01076994"
## [87] "chrUn_ABQF01082363" "chrUn_ABQF01083155"
## [89] "chrUn_ABQF01084312" "chrUn_ABQF01084477"
## [91] "chrUn_ABQF01085047" "chrUn_ABQF01085632"
## [93] "chrUn_ABQF01085690" "chrUn_ABQF01086385"
## [95] "chrUn_ABQF01087664" "chrUn_ABQF01088159"
## [97] "chrUn_ABQF01088542" "chrUn_ABQF01088767"
## [99] "chrUn_ABQF01089288" "chrUn_ABQF01089330"
## [101] "chrUn_ABQF01090168" "chrUn_ABQF01090310"
## [103] "chrUn_ABQF01090506" "chrUn_ABQF01090592"
## [105] "chrUn_ABQF01090636" "chrUn_ABQF01090689"
## [107] "chrUn_ABQF01090728" "chrUn_ABQF01091169"
## [109] "chrUn_ABQF01091951" "chrUn_ABQF01092022"
## [111] "chrUn_ABQF01092886" "chrUn_ABQF01093283"
## [113] "chrUn_ABQF01093314" "chrUn_ABQF01093495"
## [115] "chrUn_ABQF01093518" "chrUn_ABQF01093969"
## [117] "chrUn_ABQF01094092" "chrUn_ABQF01094352"
## [119] "chrUn_ABQF01094483" "chrUn_ABQF01094551"
## [121] "chrUn_ABQF01094838" "chrUn_ABQF01094949"
## [123] "chrUn_ABQF01095309" "chrUn_ABQF01095342"
## [125] "chrUn_ABQF01095695" "chrUn_ABQF01096525"
## [127] "chrUn_ABQF01096564" "chrUn_ABQF01096760"
## [129] "chrUn_ABQF01097085" "chrUn_ABQF01097702"
## [131] "chrUn_ABQF01097967" "chrUn_ABQF01098234"
## [133] "chrUn_ABQF01098334" "chrUn_ABQF01098429"
## [135] "chrUn_ABQF01098525" "chrUn_ABQF01099770"
## [137] "chrUn_ABQF01099842" "chrUn_ABQF01100862"
## [139] "chrUn_ABQF01101547" "chrUn_ABQF01101833"
## [141] "chrUn_ABQF01101896" "chrUn_ABQF01101953"
## [143] "chrUn_ABQF01102061" "chrUn_ABQF01102198"
## [145] "chrUn_ABQF01102600" "chrUn_ABQF01102862"
## [147] "chrUn_ABQF01102942" "chrUn_ABQF01103002"
## [149] "chrUn_ABQF01103318" "chrUn_ABQF01103343"
## [151] "chrUn_ABQF01103484" "chrUn_ABQF01103548"
## [153] "chrUn_ABQF01103624" "chrUn_ABQF01103699"
## [155] "chrUn_ABQF01103748" "chrUn_ABQF01104522"
## [157] "chrUn_ABQF01104971" "chrUn_ABQF01105060"
## [159] "chrUn_ABQF01105458" "chrUn_ABQF01105601"
## [161] "chrUn_ABQF01106033" "chrUn_ABQF01106178"
## [163] "chrUn_ABQF01106696" "chrUn_ABQF01107089"
## [165] "chrUn_ABQF01107284" "chrUn_ABQF01107436"
## [167] "chrUn_ABQF01107567" "chrUn_ABQF01107768"
## [169] "chrUn_ABQF01108003" "chrUn_ABQF01108366"
## [171] "chrUn_ABQF01108531" "chrUn_ABQF01108733"
## [173] "chrUn_ABQF01109034" "chrUn_ABQF01109100"
## [175] "chrUn_ABQF01109472" "chrUn_ABQF01109688"
## [177] "chrUn_ABQF01109824" "chrUn_ABQF01109867"
## [179] "chrUn_ABQF01110235" "chrUn_ABQF01110524"
## [181] "chrUn_ABQF01111109" "chrUn_ABQF01111384"
## [183] "chrUn_ABQF01111411" "chrUn_ABQF01111513"
## [185] "chrUn_ABQF01111613" "chrUn_ABQF01112217"
## [187] "chrUn_ABQF01112234" "chrUn_ABQF01112450"
## [189] "chrUn_ABQF01113479" "chrUn_ABQF01113531"
## [191] "chrUn_ABQF01113863" "chrUn_ABQF01114204"
## [193] "chrUn_ABQF01114630" "chrUn_ABQF01114934"
## [195] "chrUn_ABQF01115236" "chrUn_ABQF01115605"
## [197] "chrUn_ABQF01115962" "chrUn_ABQF01116227"
## [199] "chrUn_ABQF01116908" "chrUn_ABQF01117067"
## [201] "chrUn_ABQF01117117" "chrUn_ABQF01117267"
## [203] "chrUn_ABQF01117614" "chrUn_ABQF01117766"
## [205] "chrUn_ABQF01117884" "chrUn_ABQF01117920"
## [207] "chrUn_ABQF01118092" "chrUn_ABQF01118500"
## [209] "chrUn_ABQF01118603" "chrUn_ABQF01118704"
## [211] "chrUn_ABQF01118801" "chrUn_ABQF01119183"
## [213] "chrUn_ABQF01119240" "chrUn_ABQF01119696"
## [215] "chrUn_ABQF01119809" "chrUn_ABQF01120003"
## [217] "chrUn_ABQF01120299" "chrUn_ABQF01120431"
## [219] "chrUn_ABQF01121044" "chrUn_ABQF01121238"
## [221] "chrUn_ABQF01122122" "chrUn_ABQF01122402"
## [223] "chrUn_ABQF01122426" "chrUn_ABQF01122670"
## [225] "chrUn_ABQF01123539" "chrUn_EQ833462"
## [227] "chrUn_EQ833563" "chrUn_EQ833616"
## [229] "chrUn_EQ833779" "chrUn_EQ833785"
## [231] "chrUn_EQ833889" "chrUn_EQ834090"
## [233] "chrUn_EQ834133" "chrUn_EQ834330"
## [235] "chrUn_EQ834351" "chrUn_EQ834463"
## [237] "chrUn_EQ834643" "chrUn_EQ834689"
## [239] "chrUn_EQ834704" "chrUn_EQ834832"
## [241] "chrUn_EQ835287" "chrUn_EQ835460"
## [243] "chrUn_EQ835489" "chrUn_EQ835669"
## [245] "chrUn_EQ835754" "chrUn_EQ835845"
## [247] "chrUn_EQ836162" "chrUn_EQ836214"
## [249] "chrUn_EQ836353" "chrUn_EQ836495"
## [251] "chrUn_EQ836545" "chrUn_EQ836621"
## [253] "chrUn_EQ836880" "chrUn_EQ836890"
## [255] "chrUn_EQ837681" "chrUn_EQ837710"
## [257] "chrUn_EQ837776" "chrUn_EQ837931"
## [259] "chrUn_EQ837939" "chrUn_EQ837945"
## [261] "chrUn_EQ837975" "chrUn_EQ838175"
## [263] "chrUn_EQ838367" "chrUn_EQ838374"
## [265] "chrUn_EQ838408" "chrUn_EQ838538"
## [267] "chrUn_EQ838581" "chrUn_EQ838645"
## [269] "chrUn_EQ838780" "chrUn_EQ838860"
## [271] "chrUn_EQ838978" "chrUn_EQ839256"
## [273] "chrUn_EQ839375" "chrUn_EQ839565"
## [275] "chrUn_EQ839602" "chrUn_EQ839651"
## [277] "chrUn_EQ839674" "chrUn_EQ839893"
## [279] "chrUn_EQ839908" "chrUn_EQ840164"
## [281] "chrUn_EQ840277" "chrUn_EQ840364"
## [283] "chrUn_EQ840620" "chrUn_EQ840814"
## [285] "chrUn_EQ841032" "chrUn_EQ841097"
## [287] "chrUn_EQ841536" "chrUn_EQ841545"
## [289] "chrUn_EQ841641" "chrUn_EQ841849"
## [291] "chrUn_EQ841867" "chrUn_EQ842167"
## [293] "chrUn_EQ842220" "chrUn_EQ842299"
## [295] "chrUn_EQ842346" "chrUn_EQ842415"
## [297] "chrUn_EQ842693" "chrUn_EQ842723"
## [299] "chrUn_EQ843139" "chrUn_EQ843173"
## [301] "chrUn_EQ843182" "chrUn_EQ843251"
## [303] "chrUn_EQ843287" "chrUn_EQ843421"
## [305] "chrUn_EQ843562" "chrUn_EQ843614"
## [307] "chrUn_EQ843909" "chrUn_EQ843932"
## [309] "chrUn_EQ843953" "chrUn_EQ844182"
## [311] "chrUn_EQ844202" "chrUn_EQ844241"
## [313] "chrUn_EQ844273" "chrUn_EQ844683"
## [315] "chrUn_EQ844763" "chrUn_EQ844793"
## [317] "chrUn_EQ844849" "chrUn_EQ844879"
## [319] "chrUn_EQ844892" "chrUn_EQ844976"
## [321] "chrUn_EQ844985" "chrUn_EQ845273"
## [323] "chrUn_EQ845365" "chrUn_EQ845449"
## [325] "chrUn_EQ845527" "chrUn_EQ845546"
## [327] "chrUn_EQ845853" "chrUn_EQ845860"
## [329] "chrUn_EQ845884" "chrUn_EQ845902"
## [331] "chrUn_EQ846095" "chrUn_EQ846180"
## [333] "chrZ_EQ833352_random" "chrZ_EQ833358_random"
## [335] "chrZ_EQ833367_random"
full.ery.pap.ref.fst$Chr<-gsub("chrUn_.*", "unk", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
## [1] "chr1" "chr1_ABQF01066038_random"
## [3] "chr1_ABQF01112931_random" "chr1_ABQF01116611_random"
## [5] "chr1_EQ832370_random" "chr1_EQ832384_random"
## [7] "chr1_EQ832422_random" "chr10"
## [9] "chr10_ABQF01085055_random" "chr10_EQ832874_random"
## [11] "chr10_EQ832893_random" "chr11_EQ832924_random"
## [13] "chr12_EQ832950_random" "chr12_KB442361_random"
## [15] "chr13" "chr13_EQ832957_random"
## [17] "chr13_EQ832959_random" "chr13_EQ832971_random"
## [19] "chr13_EQ832995_random" "chr14_EQ833009_random"
## [21] "chr15_EQ833041_random" "chr17_EQ833050_random"
## [23] "chr18_KB442395_random" "chr1A"
## [25] "chr1A_ABQF01112188_random" "chr1A_EQ832472_random"
## [27] "chr2" "chr2_ABQF01062723_random"
## [29] "chr2_ABQF01082098_random" "chr2_ABQF01114633_random"
## [31] "chr2_EQ832482_random" "chr2_EQ832516_random"
## [33] "chr2_EQ832538_random" "chr2_EQ832546_random"
## [35] "chr2_EQ832581_random" "chr2_KB442204_random"
## [37] "chr2_KB442221_random" "chr2_KB442223_random"
## [39] "chr2_KB442240_random" "chr20"
## [41] "chr21" "chr21_EQ833135_random"
## [43] "chr21_EQ833162_random" "chr22_EQ833189_random"
## [45] "chr24" "chr26"
## [47] "chr26_EQ833310_random" "chr27"
## [49] "chr27_EQ833329_random" "chr3_EQ832612_random"
## [51] "chr3_EQ832614_random" "chr3_EQ832615_random"
## [53] "chr4" "chr4_ABQF01084713_random"
## [55] "chr4_EQ832640_random" "chr4_EQ832641_random"
## [57] "chr4_EQ832678_random" "chr4_KB442255_random"
## [59] "chr4A" "chr4A_EQ832701_random"
## [61] "chr5" "chr5_EQ832721_random"
## [63] "chr5_EQ832723_random" "chr5_KB442299_random"
## [65] "chr6" "chr6_ABQF01081686_random"
## [67] "chr6_EQ832758_random" "chr6_EQ832760_random"
## [69] "chr7" "chr7_EQ832789_random"
## [71] "chr7_EQ832795_random" "chr7_EQ832809_random"
## [73] "chr8_EQ832817_random" "chr8_EQ832818_random"
## [75] "chr8_EQ832819_random" "chr8_EQ832820_random"
## [77] "chr9" "chr9_EQ832848_random"
## [79] "chr9_EQ832871_random" "chrM"
## [81] "chrZ_EQ833352_random" "chrZ_EQ833358_random"
## [83] "chrZ_EQ833367_random" "unk"
#
full.ery.pap.ref.fst$Chr<-gsub(".*_random", "unk", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
## [1] "chr1" "chr10" "chr13" "chr1A" "chr2" "chr20" "chr21" "chr24"
## [9] "chr26" "chr27" "chr4" "chr4A" "chr5" "chr6" "chr7" "chr9"
## [17] "chrM" "unk"
full.ery.pap.ref.fst$Chr<-gsub("chrM", "M", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr1", 1, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr1A", "1A", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr2", 2, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr4", 4, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr4A", "4A", full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr5", 5, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr6", 6, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr7", 7, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr9", 9, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr10", 10, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr13", 13, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr20", 20, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr21", 21, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr24", 24, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr26", 26, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-gsub("chr27", 27, full.ery.pap.ref.fst$Chr)
full.ery.pap.ref.fst$Chr<-as.factor(full.ery.pap.ref.fst$Chr)
levels(full.ery.pap.ref.fst$Chr)
## [1] "1" "10" "13" "1A" "2" "20" "21" "24" "26" "27" "4"
## [12] "4A" "5" "6" "7" "9" "M" "unk"
Plot Fst, nothing above .75, which means we likely aren’t in the controlling region
ggplot(data = full.ery.pap.ref.fst) +
geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))
subset outlier snps and show chromosomal position
#subset outlier snps
full.elevated.snps<-full.ery.pap.ref.fst[full.ery.pap.ref.fst$AMOVA.Fst > .5,]
full.elevated.snps<-droplevels(full.elevated.snps)
rownames(full.elevated.snps)<-c()
full.elevated.snps[,1:8]
## X..Locus.ID Pop.1.ID Pop.2.ID Chr BP Column Overall.Pi AMOVA.Fst
## 1 26420 papuana trichroa 1 84482164 84 0.41558 0.53333
## 2 60006 papuana trichroa 1A 5190939 15 0.26154 0.52941
## 3 126092 papuana trichroa 2 150135036 15 0.37166 0.54391
## 4 141016 papuana trichroa unk 31437307 141 0.46271 0.55170
## 5 164657 papuana trichroa unk 93297358 7 0.24603 0.56452
## 6 193526 papuana trichroa 4A 14282704 42 0.48077 0.55556
## 7 193526 papuana trichroa 4A 14282799 137 0.40897 0.65308
## 8 225414 papuana trichroa 7 21914079 80 0.38571 0.59524
## 9 253839 papuana trichroa unk 19316709 98 0.30444 0.59259
more chrom cleaning
#
Chr1<-subset(full.ery.pap.ref.fst,Chr == 1)
Chr1A<-subset(full.ery.pap.ref.fst,Chr == "1A")
Chr2<-subset(full.ery.pap.ref.fst,Chr == 2)
Chr4<-subset(full.ery.pap.ref.fst,Chr == 4)
Chr4A<-subset(full.ery.pap.ref.fst,Chr == "4A")
Chr5<-subset(full.ery.pap.ref.fst,Chr == 5)
Chr6<-subset(full.ery.pap.ref.fst,Chr == 6)
Chr7<-subset(full.ery.pap.ref.fst,Chr == 7)
Chr9<-subset(full.ery.pap.ref.fst,Chr == 9)
Chr10<-subset(full.ery.pap.ref.fst,Chr == 10)
Chr13<-subset(full.ery.pap.ref.fst,Chr == 13)
Chr20<-subset(full.ery.pap.ref.fst,Chr == 20)
Chr21<-subset(full.ery.pap.ref.fst,Chr == 21)
Chr24<-subset(full.ery.pap.ref.fst,Chr == 24)
Chr26<-subset(full.ery.pap.ref.fst,Chr == 26)
Chr27<-subset(full.ery.pap.ref.fst,Chr == 27)
ChrMT<-subset(full.ery.pap.ref.fst,Chr == "M")
Chrunk<-subset(full.ery.pap.ref.fst,Chr == "unk")
full.ery.chr.table<-as.data.frame(table(full.ery.pap.ref.fst$Chr))
names(full.ery.chr.table) <- c("chromosome", "snps")
full.ery.chr.table<-full.ery.chr.table[c(1,5,11,12,13,14,15,16,2,3,6,7,8,9,10,17,18),]
full.ery.chr.table$chromosome
## [1] 1 2 4 4A 5 6 7 9 10 13 20 21 24 26 27 M unk
## Levels: 1 10 13 1A 2 20 21 24 26 27 4 4A 5 6 7 9 M unk
full.ery.chr.table$length<- c(118548696, 156412533, 69780378, 20704505, 62374962,
36305782, 39844632, 27241186, 20806668, 16962381, 15652063, 5979137, 8021379,
4907541, 4618897, 16853, 174341365)
full.ery.chr.table$fst<-c(mean(Chr1$AMOVA.Fst),mean(Chr2$AMOVA.Fst),
mean(Chr4$AMOVA.Fst),mean(Chr4A$AMOVA.Fst),mean(Chr5$AMOVA.Fst),mean(Chr6$AMOVA.Fst),
mean(Chr7$AMOVA.Fst),mean(Chr9$AMOVA.Fst),mean(Chr10$AMOVA.Fst),
mean(Chr13$AMOVA.Fst),mean(Chr20$AMOVA.Fst),mean(Chr21$AMOVA.Fst),
mean(Chr24$AMOVA.Fst),mean(Chr26$AMOVA.Fst),mean(Chr27$AMOVA.Fst),
mean(ChrMT$AMOVA.Fst),mean(Chrunk$AMOVA.Fst))
print table with chromosomal information
full.ery.chr.table
## chromosome snps length fst
## 1 1 9943 118548696 0.02394110
## 5 2 13695 156412533 0.02370993
## 11 4 6250 69780378 0.02474484
## 12 4A 570 20704505 0.03416860
## 13 5 4375 62374962 0.02358386
## 14 6 1980 36305782 0.02298070
## 15 7 2516 39844632 0.02442485
## 16 9 880 27241186 0.02368495
## 2 10 620 20806668 0.02190340
## 3 13 565 16962381 0.02365273
## 6 20 349 15652063 0.02225877
## 7 21 100 5979137 0.03197300
## 8 24 8 8021379 0.02624625
## 9 26 27 4907541 0.02402037
## 10 27 17 4618897 0.04455471
## 17 M 14 16853 0.01287714
## 18 unk 22225 174341365 0.02588512
Print plot of chromosome length vs SNP # then print plot of avg Fst per chromosome, with SNP # above each bar
ggplot(data = full.ery.chr.table, mapping = aes(x = length, y = snps, color = chromosome))+
geom_point() + ggtitle("snp # vs. chromosome length")
xx <- barplot(full.ery.chr.table$fst, xaxt = 'n', xlab = '', width = 0.85, ylim = c(0,.2),
ylab = "Avg. Fst")
## Add text at top of bars
text(x = xx, y = full.ery.chr.table$fst, label = full.ery.chr.table$snps, pos = 3, cex = 0.8, col = "red")
## Add x-axis labels
axis(1, at=xx, labels=full.ery.chr.table$chromosome, tick=FALSE, las=1, line=-0.5, cex.axis=0.5)
Show plots of different ways of visualizing Fst
ggplot(data = full.ery.pap.ref.fst) +
geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))
ggplot(data = full.elevated.snps) +
geom_point(mapping = aes(x = X..Locus.ID, y = AMOVA.Fst, color = Chr))
ggplot(data = full.ery.pap.ref.fst) +
geom_point(mapping = aes(x = X..Locus.ID, y = LOD, color = Chr))
ggplot(data = full.ery.pap.ref.fst) +
geom_point(mapping = aes(x = X..Locus.ID, y = Corrected.AMOVA.Fst, color = Chr))+
theme(legend.position="none")
ggplot(data = full.ery.pap.ref.fst) +
geom_point(mapping = aes(x = X..Locus.ID, y = Smoothed.AMOVA.Fst, color = Chr))+
theme(legend.position="none")
#read in admix files K 3-6
admix3<-t(as.matrix(read.table("~/Dropbox/full.ery/admix3.outfiles.qopt")))
barplot(admix3,col=1:3,space=0,xlab="Individuals",ylab="admixture")
indivs<-c("coloria_28408","coloria_28439","coloria_28562","coloria_28582",
"papuana_12155","papuana_12856","papuana_14618","papuana_14621","papuana_14622",
"papuana_14624","papuana_16064","papuana_16434","pealii_22531","pealii_22532",
"pealii_22533","pealii_24275","pealii_26533","trichroa_12043","trichroa_12054",
"trichroa_12061","trichroa_12062","trichroa_12254","trichroa_12301","trichroa_12846",
"trichroa_16071","trichroa_16075","trichroa_16086","trichroa_16164","trichroa_16234",
"trichroa_16248","trichroa_16315","trichroa_16548","trichroa_18292","trichroa_18295",
"trichroa_18320","trichroa_18327","trichroa_18389","trichroa_18401","trichroa_23619",
"trichroa_27881","trichroa_27882","trichroa_32757","trichroa_32797","trichroa_32805",
"trichroa_32819","trichroa_32834","trichroa_32838","trichroa_34978","trichroa_35027",
"trichroa_4702","trichroa_4766","trichroa_4772","trichroa_4775","trichroa_B32623",
"trichroa_B52739","trichroa_DOT-209")
barplot(admix3,col=1:3,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)
#4
admix4<-t(as.matrix(read.table("~/Dropbox/full.ery/admix4.outfiles.qopt")))
barplot(admix4,col=1:4,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)
#5
admix5<-t(as.matrix(read.table("~/Dropbox/full.ery/admix5.outfiles.qopt")))
barplot(admix5,col=1:5,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)
#6
admix6<-t(as.matrix(read.table("~/Dropbox/full.ery/admix6.outfiles.qopt")))
barplot(admix6,col=1:6,space=0,main="Individuals",ylab="admixture", names.arg = indivs, las = 2, cex.names = .5)
Weird result from NGSadmix at K=3, should be three clean groups… but all K values makes it clear that papuana is not a distinct clade from trichroa.